Loading Libraries

# Accessing Libraries
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(broom)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

Loading Data Set

# Loading in Data Files
phouses <- read_csv("cities-month-SA.csv")
## Rows: 295 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (23): AZ-Phoenix, CA-Los Angeles, CA-San Diego, CA-San Francisco, CO-De...
## date  (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mortperf <- read_csv("nmdb-mortgage-performance-statistics-metros-quarterly.csv")
## Rows: 150400 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): SOURCE, FREQUENCY, GEOLEVEL, GEONAME, MARKET, PERIOD, MONTH, SERIESID
## dbl (5): GEOID, YEAR, QUARTER, SUPPRESSED, VALUE1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Cleaning Data

city_map <- data.frame(
  phouses_city = c("AZ-Phoenix", "CA-Los Angeles", "CA-San Diego", "CA-San Francisco",
                   "CO-Denver", "DC-Washington", "FL-Miami", "FL-Tampa", "GA-Atlanta",
                   "IL-Chicago", "MN-Minneapolis", "NC-Charlotte", "NV-Las Vegas",
                   "NY-New York", "OH-Cleveland", "OR-Portland", "TX-Dallas", "WA-Seattle"),
  geoname_pattern = c("Phoenix", "Los Angeles", "San Diego", "San Francisco",
                      "Denver", "Washington", "Miami", "Tampa", "Atlanta",
                      "Chicago", "Minneapolis", "Charlotte", "Las Vegas",
                      "New York", "Cleveland", "Portland", "Dallas", "Seattle")
) # Comparing lists so that only these cities (which appear in both lists) are accounted for

# Changing
mortperf$GEONAME <- as.character(mortperf$GEONAME) 

mortperf$city <- NA

for (i in seq_len(nrow(city_map))) {
  pattern <- city_map$geoname_pattern[i]
  city_name <- city_map$phouses_city[i]
  match_rows <- grepl(pattern, mortperf$GEONAME, ignore.case = TRUE)
  mortperf$city[match_rows] <- city_name
}

mortperf_filtered <- mortperf[!is.na(mortperf$city), ]

Combining the data sets

# Housing Data from wide to long (cities are columns)
phousesl <- phouses %>%
  pivot_longer(
    cols = -Date,
    names_to = "city",
    values_to = "house_value"
  )

# selecing only relevant mortgage data 
mortperfs <- mortperf_filtered %>%
  select(city, PERIOD, VALUE1, SERIESID) %>%
  rename(mortgage_value = VALUE1)

# Changing date realated data to different types for easier merging
phousesl <- phousesl %>%
  mutate(Date = as.Date(Date),
         year = format(Date, "%Y"),
         quarter = ceiling(as.numeric(format(Date, "%m")) / 3),
         period = paste0(year, "Q", quarter))

# combining both data sets
combined <- merge(
  phousesl,
  mortperfs,
  by.x = c("city", "period"),
  by.y = c("city", "PERIOD"),
  all = FALSE
)

Methods/Assumptions:

# Shapiro-Wilk test for normality
norm_city <- combined %>% 
  group_by(city) %>%
  summarize(
    shapiro = shapiro.test(house_value)$p.value,
    n = n()
  )
print(norm_city)
## # A tibble: 18 × 3
##    city              shapiro     n
##    <chr>               <dbl> <int>
##  1 AZ-Phoenix       4.22e-24  1840
##  2 CA-Los Angeles   3.73e-37  1840
##  3 CA-San Diego     3.56e-37  1840
##  4 CA-San Francisco 2.08e-36  1840
##  5 CO-Denver        2.34e-38  1840
##  6 DC-Washington    2.84e-36  1840
##  7 FL-Miami         6.87e-37  1840
##  8 FL-Tampa         8.02e-28  1840
##  9 GA-Atlanta       8.25e-39  1840
## 10 IL-Chicago       6.28e-29  1840
## 11 MN-Minneapolis   1.71e-32  1840
## 12 NC-Charlotte     1.25e-38  1840
## 13 NV-Las Vegas     1.25e-36  1840
## 14 NY-New York      2.48e-31  1840
## 15 OH-Cleveland     9.98e-38  1840
## 16 OR-Portland      1.64e-30  1840
## 17 TX-Dallas        6.05e-32  1840
## 18 WA-Seattle       8.16e-24  1840
# Levene's test for equal variance 
leveneTest(house_value ~ city, data = combined)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
##          Df F value    Pr(>F)    
## group    17  735.52 < 2.2e-16 ***
##       33102                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Kruskal-Wallis test and One Way ANOVA test

# Non Parametric Version of a one Way Anova
kruskal.test(house_value ~ city, data = combined)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  house_value by city
## Kruskal-Wallis chi-squared = 20061, df = 17, p-value < 2.2e-16
oneway.test(house_value ~ city, data = combined, var.equal = FALSE)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  house_value and city
## F = 10286, num df = 17, denom df = 12190, p-value < 2.2e-16

Emmeans City Comparison

# Linear Model 
model <- lm(house_value ~ mortgage_value * city, data = combined)
# City by City Comparison analysis
emtrends(model, pairwise ~ city, var = "mortgage_value")
## $emtrends
##  city             mortgage_value.trend    SE    df lower.CL upper.CL
##  AZ-Phoenix                    -1.4581 0.335 24471   -2.114   -0.802
##  CA-Los Angeles                -5.8718 0.376 24471   -6.608   -5.135
##  CA-San Diego                   1.5758 0.459 24471    0.677    2.475
##  CA-San Francisco              26.7542 0.817 24471   25.153   28.355
##  CO-Denver                     -1.8996 0.573 24471   -3.023   -0.776
##  DC-Washington                 -3.0374 0.356 24471   -3.735   -2.340
##  FL-Miami                      -4.0559 0.192 24471   -4.432   -3.680
##  FL-Tampa                      -0.0129 0.310 24471   -0.621    0.595
##  GA-Atlanta                    -4.7086 0.334 24471   -5.364   -4.054
##  IL-Chicago                    -0.0377 0.353 24471   -0.729    0.653
##  MN-Minneapolis                -0.6823 0.585 24471   -1.829    0.465
##  NC-Charlotte                   6.9644 0.412 24471    6.157    7.772
##  NV-Las Vegas                  -3.8346 0.237 24471   -4.299   -3.371
##  NY-New York                   -3.7989 0.370 24471   -4.524   -3.074
##  OH-Cleveland                  -3.1681 0.441 24471   -4.032   -2.304
##  OR-Portland                   -4.4882 0.556 24471   -5.577   -3.399
##  TX-Dallas                     -0.1548 0.427 24471   -0.992    0.683
##  WA-Seattle                    -1.9079 0.612 24471   -3.107   -0.709
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                               estimate    SE    df t.ratio p.value
##  (AZ-Phoenix) - (CA-Los Angeles)         4.41372 0.503 24471   8.771  <.0001
##  (AZ-Phoenix) - (CA-San Diego)          -3.03386 0.568 24471  -5.344  <.0001
##  (AZ-Phoenix) - (CA-San Francisco)     -28.21230 0.883 24471 -31.966  <.0001
##  (AZ-Phoenix) - (CO-Denver)              0.44154 0.664 24471   0.665  1.0000
##  (AZ-Phoenix) - (DC-Washington)          1.57936 0.488 24471   3.234  0.1083
##  (AZ-Phoenix) - (FL-Miami)               2.59778 0.386 24471   6.736  <.0001
##  (AZ-Phoenix) - (FL-Tampa)              -1.44515 0.456 24471  -3.167  0.1302
##  (AZ-Phoenix) - (GA-Atlanta)             3.25049 0.473 24471   6.873  <.0001
##  (AZ-Phoenix) - (IL-Chicago)            -1.42036 0.486 24471  -2.922  0.2402
##  (AZ-Phoenix) - (MN-Minneapolis)        -0.77582 0.674 24471  -1.151  0.9996
##  (AZ-Phoenix) - (NC-Charlotte)          -8.42251 0.531 24471 -15.869  <.0001
##  (AZ-Phoenix) - (NV-Las Vegas)           2.37650 0.410 24471   5.798  <.0001
##  (AZ-Phoenix) - (NY-New York)            2.34077 0.499 24471   4.693  0.0004
##  (AZ-Phoenix) - (OH-Cleveland)           1.71000 0.554 24471   3.089  0.1599
##  (AZ-Phoenix) - (OR-Portland)            3.03014 0.649 24471   4.671  0.0004
##  (AZ-Phoenix) - (TX-Dallas)             -1.30324 0.543 24471  -2.401  0.6072
##  (AZ-Phoenix) - (WA-Seattle)             0.44981 0.697 24471   0.645  1.0000
##  (CA-Los Angeles) - (CA-San Diego)      -7.44759 0.593 24471 -12.562  <.0001
##  (CA-Los Angeles) - (CA-San Francisco) -32.62602 0.899 24471 -36.292  <.0001
##  (CA-Los Angeles) - (CO-Denver)         -3.97219 0.685 24471  -5.795  <.0001
##  (CA-Los Angeles) - (DC-Washington)     -2.83436 0.517 24471  -5.478  <.0001
##  (CA-Los Angeles) - (FL-Miami)          -1.81595 0.422 24471  -4.305  0.0023
##  (CA-Los Angeles) - (FL-Tampa)          -5.85887 0.487 24471 -12.024  <.0001
##  (CA-Los Angeles) - (GA-Atlanta)        -1.16323 0.503 24471  -2.313  0.6739
##  (CA-Los Angeles) - (IL-Chicago)        -5.83409 0.515 24471 -11.322  <.0001
##  (CA-Los Angeles) - (MN-Minneapolis)    -5.18954 0.695 24471  -7.463  <.0001
##  (CA-Los Angeles) - (NC-Charlotte)     -12.83624 0.558 24471 -23.021  <.0001
##  (CA-Los Angeles) - (NV-Las Vegas)      -2.03722 0.444 24471  -4.587  0.0006
##  (CA-Los Angeles) - (NY-New York)       -2.07295 0.527 24471  -3.932  0.0104
##  (CA-Los Angeles) - (OH-Cleveland)      -2.70372 0.579 24471  -4.667  0.0004
##  (CA-Los Angeles) - (OR-Portland)       -1.38358 0.671 24471  -2.062  0.8379
##  (CA-Los Angeles) - (TX-Dallas)         -5.71697 0.569 24471 -10.046  <.0001
##  (CA-Los Angeles) - (WA-Seattle)        -3.96392 0.718 24471  -5.522  <.0001
##  (CA-San Diego) - (CA-San Francisco)   -25.17843 0.937 24471 -26.883  <.0001
##  (CA-San Diego) - (CO-Denver)            3.47540 0.734 24471   4.734  0.0003
##  (CA-San Diego) - (DC-Washington)        4.61323 0.580 24471   7.949  <.0001
##  (CA-San Diego) - (FL-Miami)             5.63164 0.497 24471  11.332  <.0001
##  (CA-San Diego) - (FL-Tampa)             1.58871 0.554 24471   2.870  0.2698
##  (CA-San Diego) - (GA-Atlanta)           6.28435 0.567 24471  11.076  <.0001
##  (CA-San Diego) - (IL-Chicago)           1.61350 0.578 24471   2.790  0.3195
##  (CA-San Diego) - (MN-Minneapolis)       2.25804 0.743 24471   3.037  0.1824
##  (CA-San Diego) - (NC-Charlotte)        -5.38865 0.616 24471  -8.742  <.0001
##  (CA-San Diego) - (NV-Las Vegas)         5.41036 0.516 24471  10.484  <.0001
##  (CA-San Diego) - (NY-New York)          5.37464 0.589 24471   9.124  <.0001
##  (CA-San Diego) - (OH-Cleveland)         4.74386 0.636 24471   7.458  <.0001
##  (CA-San Diego) - (OR-Portland)          6.06401 0.720 24471   8.417  <.0001
##  (CA-San Diego) - (TX-Dallas)            1.73062 0.627 24471   2.761  0.3382
##  (CA-San Diego) - (WA-Seattle)           3.48367 0.764 24471   4.557  0.0007
##  (CA-San Francisco) - (CO-Denver)       28.65383 0.998 24471  28.717  <.0001
##  (CA-San Francisco) - (DC-Washington)   29.79166 0.891 24471  33.444  <.0001
##  (CA-San Francisco) - (FL-Miami)        30.81007 0.839 24471  36.728  <.0001
##  (CA-San Francisco) - (FL-Tampa)        26.76715 0.874 24471  30.640  <.0001
##  (CA-San Francisco) - (GA-Atlanta)      31.46279 0.882 24471  35.656  <.0001
##  (CA-San Francisco) - (IL-Chicago)      26.79193 0.890 24471  30.119  <.0001
##  (CA-San Francisco) - (MN-Minneapolis)  27.43648 1.000 24471  27.309  <.0001
##  (CA-San Francisco) - (NC-Charlotte)    19.78978 0.915 24471  21.636  <.0001
##  (CA-San Francisco) - (NV-Las Vegas)    30.58880 0.850 24471  35.974  <.0001
##  (CA-San Francisco) - (NY-New York)     30.55307 0.897 24471  34.080  <.0001
##  (CA-San Francisco) - (OH-Cleveland)    29.92230 0.928 24471  32.241  <.0001
##  (CA-San Francisco) - (OR-Portland)     31.24244 0.988 24471  31.628  <.0001
##  (CA-San Francisco) - (TX-Dallas)       26.90905 0.922 24471  29.194  <.0001
##  (CA-San Francisco) - (WA-Seattle)      28.66210 1.020 24471  28.092  <.0001
##  (CO-Denver) - (DC-Washington)           1.13783 0.675 24471   1.686  0.9694
##  (CO-Denver) - (FL-Miami)                2.15624 0.604 24471   3.567  0.0387
##  (CO-Denver) - (FL-Tampa)               -1.88669 0.652 24471  -2.894  0.2555
##  (CO-Denver) - (GA-Atlanta)              2.80896 0.664 24471   4.233  0.0031
##  (CO-Denver) - (IL-Chicago)             -1.86190 0.673 24471  -2.766  0.3346
##  (CO-Denver) - (MN-Minneapolis)         -1.21736 0.819 24471  -1.486  0.9917
##  (CO-Denver) - (NC-Charlotte)           -8.86405 0.706 24471 -12.556  <.0001
##  (CO-Denver) - (NV-Las Vegas)            1.93497 0.620 24471   3.120  0.1478
##  (CO-Denver) - (NY-New York)             1.89924 0.682 24471   2.784  0.3231
##  (CO-Denver) - (OH-Cleveland)            1.26846 0.723 24471   1.754  0.9559
##  (CO-Denver) - (OR-Portland)             2.58861 0.798 24471   3.242  0.1057
##  (CO-Denver) - (TX-Dallas)              -1.74478 0.715 24471  -2.440  0.5768
##  (CO-Denver) - (WA-Seattle)              0.00827 0.838 24471   0.010  1.0000
##  (DC-Washington) - (FL-Miami)            1.01841 0.404 24471   2.520  0.5144
##  (DC-Washington) - (FL-Tampa)           -3.02451 0.472 24471  -6.408  <.0001
##  (DC-Washington) - (GA-Atlanta)          1.67113 0.488 24471   3.424  0.0615
##  (DC-Washington) - (IL-Chicago)         -2.99973 0.501 24471  -5.989  <.0001
##  (DC-Washington) - (MN-Minneapolis)     -2.35518 0.685 24471  -3.439  0.0586
##  (DC-Washington) - (NC-Charlotte)      -10.00188 0.544 24471 -18.376  <.0001
##  (DC-Washington) - (NV-Las Vegas)        0.79714 0.427 24471   1.866  0.9245
##  (DC-Washington) - (NY-New York)         0.76141 0.513 24471   1.484  0.9918
##  (DC-Washington) - (OH-Cleveland)        0.13064 0.567 24471   0.231  1.0000
##  (DC-Washington) - (OR-Portland)         1.45078 0.660 24471   2.199  0.7548
##  (DC-Washington) - (TX-Dallas)          -2.88261 0.556 24471  -5.184  <.0001
##  (DC-Washington) - (WA-Seattle)         -1.12956 0.708 24471  -1.596  0.9822
##  (FL-Miami) - (FL-Tampa)                -4.04293 0.365 24471 -11.088  <.0001
##  (FL-Miami) - (GA-Atlanta)               0.65271 0.385 24471   1.694  0.9680
##  (FL-Miami) - (IL-Chicago)              -4.01814 0.401 24471 -10.013  <.0001
##  (FL-Miami) - (MN-Minneapolis)          -3.37360 0.616 24471  -5.479  <.0001
##  (FL-Miami) - (NC-Charlotte)           -11.02029 0.454 24471 -24.255  <.0001
##  (FL-Miami) - (NV-Las Vegas)            -0.22128 0.305 24471  -0.726  1.0000
##  (FL-Miami) - (NY-New York)             -0.25700 0.417 24471  -0.617  1.0000
##  (FL-Miami) - (OH-Cleveland)            -0.88778 0.481 24471  -1.847  0.9307
##  (FL-Miami) - (OR-Portland)              0.43237 0.588 24471   0.736  1.0000
##  (FL-Miami) - (TX-Dallas)               -3.90102 0.468 24471  -8.329  <.0001
##  (FL-Miami) - (WA-Seattle)              -2.14797 0.641 24471  -3.351  0.0768
##  (FL-Tampa) - (GA-Atlanta)               4.69564 0.456 24471  10.298  <.0001
##  (FL-Tampa) - (IL-Chicago)               0.02479 0.470 24471   0.053  1.0000
##  (FL-Tampa) - (MN-Minneapolis)           0.66933 0.662 24471   1.011  0.9999
##  (FL-Tampa) - (NC-Charlotte)            -6.97736 0.516 24471 -13.531  <.0001
##  (FL-Tampa) - (NV-Las Vegas)             3.82165 0.390 24471   9.794  <.0001
##  (FL-Tampa) - (NY-New York)              3.78592 0.483 24471   7.844  <.0001
##  (FL-Tampa) - (OH-Cleveland)             3.15515 0.539 24471   5.853  <.0001
##  (FL-Tampa) - (OR-Portland)              4.47529 0.636 24471   7.032  <.0001
##  (FL-Tampa) - (TX-Dallas)                0.14191 0.528 24471   0.269  1.0000
##  (FL-Tampa) - (WA-Seattle)               1.89496 0.686 24471   2.763  0.3368
##  (GA-Atlanta) - (IL-Chicago)            -4.67085 0.486 24471  -9.615  <.0001
##  (GA-Atlanta) - (MN-Minneapolis)        -4.02631 0.674 24471  -5.975  <.0001
##  (GA-Atlanta) - (NC-Charlotte)         -11.67301 0.530 24471 -22.006  <.0001
##  (GA-Atlanta) - (NV-Las Vegas)          -0.87399 0.410 24471  -2.134  0.7963
##  (GA-Atlanta) - (NY-New York)           -0.90972 0.498 24471  -1.825  0.9372
##  (GA-Atlanta) - (OH-Cleveland)          -1.54049 0.553 24471  -2.785  0.3227
##  (GA-Atlanta) - (OR-Portland)           -0.22035 0.648 24471  -0.340  1.0000
##  (GA-Atlanta) - (TX-Dallas)             -4.55373 0.542 24471  -8.394  <.0001
##  (GA-Atlanta) - (WA-Seattle)            -2.80068 0.697 24471  -4.019  0.0074
##  (IL-Chicago) - (MN-Minneapolis)         0.64454 0.683 24471   0.943  1.0000
##  (IL-Chicago) - (NC-Charlotte)          -7.00215 0.542 24471 -12.914  <.0001
##  (IL-Chicago) - (NV-Las Vegas)           3.79686 0.425 24471   8.941  <.0001
##  (IL-Chicago) - (NY-New York)            3.76114 0.511 24471   7.361  <.0001
##  (IL-Chicago) - (OH-Cleveland)           3.13036 0.565 24471   5.545  <.0001
##  (IL-Chicago) - (OR-Portland)            4.45051 0.658 24471   6.762  <.0001
##  (IL-Chicago) - (TX-Dallas)              0.11712 0.554 24471   0.211  1.0000
##  (IL-Chicago) - (WA-Seattle)             1.87017 0.706 24471   2.649  0.4169
##  (MN-Minneapolis) - (NC-Charlotte)      -7.64669 0.716 24471 -10.686  <.0001
##  (MN-Minneapolis) - (NV-Las Vegas)       3.15232 0.631 24471   4.994  0.0001
##  (MN-Minneapolis) - (NY-New York)        3.11659 0.692 24471   4.502  0.0009
##  (MN-Minneapolis) - (OH-Cleveland)       2.48582 0.733 24471   3.393  0.0677
##  (MN-Minneapolis) - (OR-Portland)        3.80596 0.807 24471   4.716  0.0003
##  (MN-Minneapolis) - (TX-Dallas)         -0.52742 0.725 24471  -0.728  1.0000
##  (MN-Minneapolis) - (WA-Seattle)         1.22563 0.846 24471   1.448  0.9938
##  (NC-Charlotte) - (NV-Las Vegas)        10.79902 0.475 24471  22.729  <.0001
##  (NC-Charlotte) - (NY-New York)         10.76329 0.554 24471  19.443  <.0001
##  (NC-Charlotte) - (OH-Cleveland)        10.13252 0.603 24471  16.793  <.0001
##  (NC-Charlotte) - (OR-Portland)         11.45266 0.692 24471  16.556  <.0001
##  (NC-Charlotte) - (TX-Dallas)            7.11927 0.594 24471  11.994  <.0001
##  (NC-Charlotte) - (WA-Seattle)           8.87232 0.737 24471  12.032  <.0001
##  (NV-Las Vegas) - (NY-New York)         -0.03573 0.439 24471  -0.081  1.0000
##  (NV-Las Vegas) - (OH-Cleveland)        -0.66650 0.500 24471  -1.332  0.9976
##  (NV-Las Vegas) - (OR-Portland)          0.65364 0.604 24471   1.082  0.9998
##  (NV-Las Vegas) - (TX-Dallas)           -3.67974 0.489 24471  -7.532  <.0001
##  (NV-Las Vegas) - (WA-Seattle)          -1.92669 0.656 24471  -2.938  0.2316
##  (NY-New York) - (OH-Cleveland)         -0.63077 0.575 24471  -1.096  0.9998
##  (NY-New York) - (OR-Portland)           0.68937 0.668 24471   1.033  0.9999
##  (NY-New York) - (TX-Dallas)            -3.64402 0.565 24471  -6.448  <.0001
##  (NY-New York) - (WA-Seattle)           -1.89097 0.715 24471  -2.646  0.4193
##  (OH-Cleveland) - (OR-Portland)          1.32014 0.709 24471   1.861  0.9260
##  (OH-Cleveland) - (TX-Dallas)           -3.01324 0.614 24471  -4.907  0.0001
##  (OH-Cleveland) - (WA-Seattle)          -1.26019 0.754 24471  -1.671  0.9719
##  (OR-Portland) - (TX-Dallas)            -4.33339 0.701 24471  -6.181  <.0001
##  (OR-Portland) - (WA-Seattle)           -2.58034 0.826 24471  -3.123  0.1467
##  (TX-Dallas) - (WA-Seattle)              1.75305 0.746 24471   2.350  0.6465
## 
## P value adjustment: tukey method for comparing a family of 18 estimates

Fig 1:

# Boxplot, using median house value for geographical city
ggplot(combined, aes(x = city, y = house_value)) +
  geom_boxplot(fill = "darkred", color = "gray30") +
  labs(title = "House Value by City",
       x = "City",
       y = "House Value (Index)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Creating Summary Statistics
combined %>%
  group_by(city) %>%
  summarise(
    mean_house = mean(house_value, na.rm = TRUE),
    median_house = median(house_value, na.rm = TRUE),
    sd_house = sd(house_value, na.rm = TRUE),
    q1 = quantile(house_value, 0.25, na.rm = TRUE),
    q3 = quantile(house_value, 0.75, na.rm = TRUE),
    iqr = IQR(house_value, na.rm = TRUE),
    .groups = 'drop'
  )
## # A tibble: 18 × 7
##    city             mean_house median_house sd_house    q1    q3    iqr
##    <chr>                 <dbl>        <dbl>    <dbl> <dbl> <dbl>  <dbl>
##  1 AZ-Phoenix            179.         173.     24.7  167.  201.   34.9 
##  2 CA-Los Angeles        165.         152.     33.1  137.  203.   65.9 
##  3 CA-San Diego           81.5         80.1     5.59  76.8  84.5   7.61
##  4 CA-San Francisco      240.         219.     50.7  198.  302.  104.  
##  5 CO-Denver             189.         167.     51.7  146.  236.   90.1 
##  6 DC-Washington         140.         133.     21.9  120.  163.   43.3 
##  7 FL-Miami              190.         173.     39.5  156.  234.   78.5 
##  8 FL-Tampa              147.         147.     24.0  127.  172.   45.1 
##  9 GA-Atlanta            149.         132.     44.7  109.  192.   83.0 
## 10 IL-Chicago            118.         117.      8.81 111.  126.   14.6 
## 11 MN-Minneapolis         89.4         87.8    18.2   71.3 100.   29.1 
## 12 NC-Charlotte          215.         193.     52.4  175.  286.  111.  
## 13 NV-Las Vegas          167.         153.     37.9  137.  201.   64.5 
## 14 NY-New York           198.         178.     45.6  166.  241.   75.0 
## 15 OH-Cleveland          159.         143.     50.9  114.  215.  100.  
## 16 OR-Portland           141.         139.     17.7  125.  158.   32.2 
## 17 TX-Dallas             130.         128.      5.13 125.  135.    9.57
## 18 WA-Seattle            161.         158.     12.4  152.  171.   18.9
ggsave("fig1.png")
## Saving 7 x 5 in image

Fig 2

# Running Liner Model for Each City

results <- combined %>%
  group_by(city) %>%
  do({
    mod <- lm(house_value ~ mortgage_value, data = .)
    tibble(
      shapiro_p = if(nrow(.) >= 3 & nrow(.) <= 5000) shapiro.test(residuals(mod))$p.value else NA, 
    )
  }) %>% ungroup()

print(results)
## # A tibble: 18 × 2
##    city             shapiro_p
##    <chr>                <dbl>
##  1 AZ-Phoenix        1.56e-20
##  2 CA-Los Angeles    4.68e-25
##  3 CA-San Diego      1.48e-29
##  4 CA-San Francisco  4.60e-21
##  5 CO-Denver         6.06e-33
##  6 DC-Washington     2.38e-23
##  7 FL-Miami          4.87e-21
##  8 FL-Tampa          3.00e-24
##  9 GA-Atlanta        1.57e-28
## 10 IL-Chicago        4.26e-25
## 11 MN-Minneapolis    9.02e-28
## 12 NC-Charlotte      3.98e-26
## 13 NV-Las Vegas      8.39e-24
## 14 NY-New York       9.86e-21
## 15 OH-Cleveland      2.82e-32
## 16 OR-Portland       7.31e-18
## 17 TX-Dallas         2.78e-27
## 18 WA-Seattle        6.37e-14
# Dignostics for San Francisco
mod <- lm(house_value ~ mortgage_value, data = filter(combined, city =="CA-San Francisco"))
plot(mod)

summary(mod)
## 
## Call:
## lm(formula = house_value ~ mortgage_value, data = filter(combined, 
##     city == "CA-San Francisco"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -81.724 -28.181  -6.377  18.071 115.854 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    215.4733     1.5491  139.09   <2e-16 ***
## mortgage_value  26.7542     0.9956   26.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39.47 on 1045 degrees of freedom
##   (793 observations deleted due to missingness)
## Multiple R-squared:  0.4086, Adjusted R-squared:  0.4081 
## F-statistic: 722.1 on 1 and 1045 DF,  p-value: < 2.2e-16
print(mod)
## 
## Call:
## lm(formula = house_value ~ mortgage_value, data = filter(combined, 
##     city == "CA-San Francisco"))
## 
## Coefficients:
##    (Intercept)  mortgage_value  
##         215.47           26.75
# Dignostics for Los Angeles
mod1 <- lm(house_value ~ mortgage_value, data = filter(combined, city =="CA-Los Angeles"))
plot(mod1)

summary(mod1)
## 
## Call:
## lm(formula = house_value ~ mortgage_value, data = filter(combined, 
##     city == "CA-Los Angeles"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.882 -27.010  -6.538  31.512  64.344 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    178.4931     1.1263  158.48   <2e-16 ***
## mortgage_value  -5.8718     0.3505  -16.75   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30.19 on 1378 degrees of freedom
##   (460 observations deleted due to missingness)
## Multiple R-squared:  0.1692, Adjusted R-squared:  0.1686 
## F-statistic: 280.7 on 1 and 1378 DF,  p-value: < 2.2e-16
print(mod1)
## 
## Call:
## lm(formula = house_value ~ mortgage_value, data = filter(combined, 
##     city == "CA-Los Angeles"))
## 
## Coefficients:
##    (Intercept)  mortgage_value  
##        178.493          -5.872

Fig 2 and 3

# Facuet Grid for Linear Model
ggplot(combined, aes(x = mortgage_value, y = house_value)) +
  geom_point(alpha = 0.4, color = "steelblue") +
  geom_smooth(method = "lm", se = FALSE, color = "darkred") +
  facet_wrap(~ city, scales = "free_y", ncol = 4) +
  labs(
    title = "Mortgage Performance vs. House Value by City",
    x = "Mortgage Performance",
    y = "House Value (Index)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8613 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 8613 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("fig2.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8613 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 8613 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Summary Statistics 
combined %>%
  group_by(city) %>%
  summarise(
    mean_mortgage = mean(mortgage_value, na.rm = TRUE),
    mean_house = mean(house_value, na.rm = TRUE),
    cor_house_mortgage = cor(house_value, mortgage_value),
    n = n()
  )
## # A tibble: 18 × 5
##    city             mean_mortgage mean_house cor_house_mortgage     n
##    <chr>                    <dbl>      <dbl>              <dbl> <int>
##  1 AZ-Phoenix               2.85       179.                  NA  1840
##  2 CA-Los Angeles           2.22       165.                  NA  1840
##  3 CA-San Diego             1.77        81.5                 NA  1840
##  4 CA-San Francisco         0.959      240.                  NA  1840
##  5 CO-Denver                2.28       189.                  NA  1840
##  6 DC-Washington            3.00       140.                  NA  1840
##  7 FL-Miami                 4.44       190.                  NA  1840
##  8 FL-Tampa                 3.41       147.                  NA  1840
##  9 GA-Atlanta               3.69       149.                  NA  1840
## 10 IL-Chicago               3.20       118.                  NA  1840
## 11 MN-Minneapolis           1.94        89.4                 NA  1840
## 12 NC-Charlotte             3.06       215.                  NA  1840
## 13 NV-Las Vegas             3.61       167.                  NA  1840
## 14 NY-New York              2.70       198.                  NA  1840
## 15 OH-Cleveland             3.52       159.                  NA  1840
## 16 OR-Portland              1.91       141.                  NA  1840
## 17 TX-Dallas                2.75       130.                  NA  1840
## 18 WA-Seattle               1.56       161.                  NA  1840
# Comparison of San Fransico and Los Angeles (both in the state of California)
combined %>%
  filter(city %in% c("CA-San Francisco", "CA-Los Angeles")) %>%
  ggplot(aes(x = mortgage_value, y = house_value, color = city)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(
    title = "San Francisco vs Los Angeles House Value Trends",
    x = "Mortgage Performance",
    y = "House Value"
  )
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1253 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1253 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("fig3.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1253 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 1253 rows containing missing values or values outside the scale range
## (`geom_point()`).